German Tanks

Author

Oliver d’Pug

Simulate a Number of Battles

The function tanks simulates n_reps battles with n_obs serial numbers recorded for each battle. The argument n_tanks is the number of tanks that the Germans had. The argument fixedest is the expert’s best guess.

Code
tanks <- function (n_tanks = 84, n_obs = 5, n_reps = 100, fixedest = 87) 
{
    temp <- as.matrix(rep(n_tanks, n_reps))
    temp <- apply(temp, 1, sample, size = n_obs)
    temp <- t(apply(temp, 2, tanks.ests, fixedest))
    temp
}

Compute Estimates

The function tank.ests runs on a sample of observed serial numbers. Each of the vector values is the result of a student supplied estimator. These need to be changed to reflect the class’ ideas.

Code
tanks.ests <- function (x = stop("Argument 'x' is missing"), fixedest = 125) 
{
    n <- length(x)
    nodd <- n %% 2 == 1
    ests <- rep(NA, 16)
    xbar <- mean(x)
    xmedian <- median(x)
    xn <- max(x)
    xvar <- var(x)
    xstddev <- sqrt(xvar)
    x1 <- min(x)
    rng <- c(-1,1)%*%range(x)
    xsum <- sum(x)
    xnm1 <- sort(x)[n-1]
    
    ests[1] <- 2 * xbar
    ests[2] <- 2 * xmedian
    ests[3] <- xbar + xmedian
    ests[4] <- sum(x[c(n-1,n)])
    ests[5] <- nodd * (xmedian + x[(n+1)/2 + 1]) + !nodd * (xmedian + x[n/2 + 1])
    ests[6] <- xn + xmedian
    ests[7] <- xbar + 2*xstddev
    #
      y = sort(x)
      z = c(NA,y[-n])
    ests[8] <- xn + mean(y-z, na.rm = TRUE)
      z = c(0,y[-n])
    ests[9] <- xn + mean(y-z)
      z = c(1,y[-n])
    ests[10] <- xn + mean(y-z)
    ests[11] <- (3*xn - x1)/2
    ests[12] <- (5*xn - xbar)/3
    ests[13] <- 3*xmedian + rng
    ests[14] <- xn
    ests[15] <- xn * ((n+1)/n) - 1
    ests[16] <- fixedest 
    names(ests) <- c("2*xbar", "2*m", "xbar + m", "x[n]+x[n-1]", "m + next biggest", 
                     "x[n] + m", "xbar + 2s", "x[n]+mean(x -lag(x))", "x[n]+mean(x -lag(x) w/ 0)", 
                     "x[n]+mean(x -lag(x) w/ 1)", "(3x[n]-x[1])/2", "(5*x[n]-xbar)/3", "3m + range(x)", 
                     "x[n]", "UMVUE", fixedest)
    ests   
    
    #ests[1] <- xn
    #ests[2] <- 2 * xbar
    #ests[3] <- xn + x1
    #ests[4] <- xn + rng/2
    #ests[5] <- xbar + xstddev
    #ests[6] <- 2 * xmedian
    #ests[7] <- xn + xnm1
    #ests[8] <- 2*xn - x1
    #ests[9] <- xsum
    #ests[10] <- xn * ((n+1)/n) - 1
    #ests[11] <- fixedest
    #names(ests) <- c("x[n]","2*xbar", "x[n]+x[1]", "x[n]+R/2", "xbar+s", "2*m", 
    #                 "x[n]+x[n-1]","2x[n]-x[1]","sum(x)", "UMVUE",fixedest)
    #ests
}

Compute Descriptive Statistics

The function tanks.descriptives computes descriptive statistics for each of the estimators in tanks.ests.

Code
tanks.descriptives <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    means <- apply(temp, 2, mean)
    stddevs <- sqrt(apply(temp, 2, var))
    medians <- apply(temp, 2, median)
    bias <- means - n
    mse <- bias^2 + stddevs^2
    t(cbind(means, stddevs, medians, bias, mse))
}

Plot Estimates

The individual estimates computed for the samples from each battle can be plotted. This allows us to compare location and dispersion statistics — center and spread. tanks.plots2 is intended to be an “improved” version of tanks.plots. Both plots use traditional lattice boxplots and there is a ggplot plot as well.

Code
### Load the lattice package
p_load(lattice)

tanks.plots <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator")
}

tanks.plots2 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    bwplot(factor(temp[, 2], labels = tanknames) ~ 
        temp[, 1], xlab = "Number of Tanks", 
        ylab = "Estimator", panel = function (x , 
            y , vref = n, ... ) 
        {
            panel.bwplot(x, y)
            panel.abline(v = vref, lty = 2)
        }, vref = n)
}

p_load(ggplot2)
p_load(tidyverse)
tanks.plots3 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    tanknames <- attributes(temp)$dimnames[[2]]
    dims <- dim(temp)
    temp <- as.vector(t(temp))
    temp <- cbind(temp, rep(1:dims[2], dims[1]))
    temp <- as_tibble(temp)
    names(temp) <- c("Estimate", "Estimator")
    temp$Estimator <- factor(temp$Estimator)
    ggplot(temp, aes(x=Estimator, y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

tanks.plots4 <- function (n = 84, obs = 5, reps = 100, fixedest = 87) 
{
    temp <- tanks(n, obs, reps, fixedest)
    temp <- melt(temp)
    names(temp) <- c("RowID","Estimator","Estimate")
    ggplot(temp, aes(x=Estimator, y=Estimate)) +
      geom_boxplot(alpha=0.7) +
      stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
      theme(legend.position="none") +
      scale_fill_brewer(palette="Set1") +
      geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
      coord_flip()
}

Compare our Estimators

The class’ estimators can be compared using the functions defined above.

Code
  ### Compute descriptive stats
  tanks.descriptives(n = 47, obs = 5, reps = 10000, fixedest = 50)
           2*xbar       2*m  xbar + m x[n]+x[n-1] m + next biggest  x[n] + m
means    48.10448  48.09400  48.09924    48.15300         48.23620  64.11970
stddevs  11.68563  17.03625  13.93933    19.03241         18.61887  12.68711
medians  48.00000  48.00000  48.20000    48.00000         48.00000  65.00000
bias      1.10448   1.09400   1.09924     1.15300          1.23620  17.11970
mse     137.77391 291.43062 195.51321   363.56222        348.19067 454.04680
        xbar + 2s x[n]+mean(x -lag(x)) x[n]+mean(x -lag(x) w/ 0)
means   50.569001            48.079050                 48.087240
stddevs  9.112276             7.709197                  7.562589
medians 51.548247            50.000000                 50.400000
bias     3.569001             1.079050                  1.087240
mse     95.771347            60.596068                 58.374839
        x[n]+mean(x -lag(x) w/ 1) (3x[n]-x[1])/2 (5*x[n]-xbar)/3 3m + range(x)
means                   47.887240      56.085400        58.77042     104.16640
stddevs                  7.562589       9.332868         9.33617      26.77891
medians                 50.200000      58.000000        61.13333     104.00000
bias                     0.887240       9.085400        11.77042      57.16640
mse                     57.979943     169.646910       225.70686    3985.10711
             x[n]     UMVUE 50
means   40.072700 47.087240 50
stddevs  6.302157  7.562589  0
medians 42.000000 49.400000 50
bias    -6.927300  0.087240  3
mse     87.704672 57.200359  9
Code
  ### Plot the estimates from each of the estimators
  tanks.plots(n = 47, obs = 5, reps = 10000, fixedest = 50)

Code
  ### Plot the estimates from each of the estimators
  tanks.plots2(n = 47, obs = 5, reps = 10000, fixedest = 50)

Code
  ### Plot the estimates from each of the estimators
  tanks.plots4(n = 47, obs = 5, reps = 10000, fixedest = 50)

Note that \widehat{N} = X_{(n)} \frac{n+1}{n} - 1 is UMVUE for N when the X_i are i.i.d. DU(1,N).